library(stargazer)
library(data.table)
library(lfe)
library(ivreg)
library(starpolishr)
library(dplyr)
library(ggplot2)
library(maps)
library(elections)
library(lubridate)
library(DescTools)

rm(list=ls())
# setwd("")

fData <- fread("Final_data_counties_stata_new_unemployment.csv")


################ Table 1: Summary statistics ###########
#select relevant variables
fSumStat <- fData[,c("Change_frac_dem","Protest_bin", "Days_of_protests","PRCP_total","Estimate_low_tot_pop_100")]
fSumStat_protest <- fData[Tot_protests>0,c("Change_frac_dem","Days_of_protests","PRCP_total","Estimate_low_tot_pop_100")]

#create relevant sum stats
lSumStat <- lapply(fSumStat, function(x){
  temp <- data.frame(Mean = mean(x),Min = min(x),Max= max(x))
  return(temp)
})

#create relevant sum stats for protest counties
lSumStat_protest <- lapply(fSumStat_protest, function(x){
  temp <- data.frame(Mean = mean(x),Min = min(x), Max= max(x))
  return(temp)
})

#combine
fSumStat <- do.call(rbind.data.frame,lSumStat)
fSumStat_protest <- do.call(rbind.data.frame,lSumStat_protest)
fSumStat_both <- rbind.data.frame(fSumStat,fSumStat_protest)

#set the right order
fSumStat_both$Order <- c(1,3,6,4,8,2,7,5,9)
fSumStat_both <- fSumStat_both[order(fSumStat_both$Order),] %>% dplyr::select(-Order)
colnames(fSumStat_both) <- c("Mean","Min","Max")
rownames(fSumStat_both) <- c("Change Democratic vote share","Change Democratic vote share, protest counties","Protest county","Rainfall","Rainfall, protest counties", "Days of protests","Days of protests, protest counties",
                             "Protest attendees/Population","Protest attendees/Population, protest counties")

fSumStat_tex <- stargazer(fSumStat_both,
                          summary=F,
                          omit.summary.stat = "N",
                          digits=2,
                          median=T,
                          font.size = "scriptsize", 
                          label = "Summary_stats",
                          title = "Summary Statistics",
                          # covariate.labels = c("Change in Democratic vote share",
                          #                      "Protests (Yes/No)",
                          #                      "Number of protests",
                          #                      "Rainfall",
                          #                      "Number of protesters (1000s)",
                          #                      "Number of protesters (1000s, high estimate)"),
                          table.placement = "H")
write.table(fSumStat_tex,file="Tables/Summary_stats.tex",
            row.names= FALSE,na="", quote = FALSE, col.names = FALSE)



############# Figure A1: Protest attendance over time #############

vAttendance_vars <- colnames(fData)[grepl("Attendance_day",colnames(fData))]
fData_attendance <- fData[,..vAttendance_vars]
fData_attendance <- melt(fData_attendance)
fGraph_attendance <- fData_attendance[,.(Total = sum(value)), by=variable]
fGraph_attendance$Date <- as.Date(dmy("26-05-2020"):dmy("07-06-2020"))

vProtests_vars <- colnames(fData)[grepl("Protests_day",colnames(fData))]
fData_Protests <- fData[,..vProtests_vars]
fData_Protests <- as.data.frame(sapply(fData_Protests, function(x) as.numeric(x>0))) #convert protests into protest days
fData_Protests <- as.data.table(fData_Protests)
fData_Protests <- melt(fData_Protests)
fGraph_Protests <- fData_Protests[,.(Total = sum(value)), by=variable]
fGraph_Protests$Date <- as.Date(dmy("26-05-2020"):dmy("07-06-2020"))

fGraph_both <- rbind(fGraph_attendance,fGraph_Protests)
fGraph_both <- fGraph_both[,Total_adj := ifelse(grepl("Attendance",variable),Total/500,Total)]
fGraph_both <- fGraph_both[,Outcome := ifelse(grepl("Attendance",variable),"Attendance","Protests")]
fGraph_both <- fGraph_both[,Outcome := factor(Outcome, levels=c("Protests","Attendance"))]

pdf("Graphs/Graph_data_time.pdf")
ggplot(fGraph_both, aes(x=Date, y=Total_adj, color=Outcome)) +
  geom_point() +
  geom_line() +
  theme_bw() +
  scale_y_continuous(name="Protests\n",sec.axis = sec_axis(trans = ~.*500,name="Attendees\n", labels = scales::comma))+
  xlab("")+
  theme(text=element_text(size=15), legend.position = "bottom", legend.title = element_blank())
dev.off()



# # ############## Figures A2, A3 and A4: county maps (This no longer works because the underlying package is discontinued, so it is commented out) ##################
# data(counties)
# data(states)
# fCounties <- data.table(counties)
# fCounties <- fCounties[,County := substr(county_name, 1,(nchar(county_name)-7))]
# fCounties <- fCounties[,County_state := paste0(County, " ", state_name)]
# 
# fGraph_election <- left_join(fData, fCounties, by = "County_state")
# 
# 
# pdf("Graphs/Graph_election_US.pdf")
# ggplot(fGraph_election, aes(long, lat,group=group, fill=Frac_dem_2020))+
#   geom_polygon(color=NA) +
#   coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
#   labs(fill = "Democratic vote share") +
#   scale_fill_gradient(labels = scales::percent_format(accuracy=1),
#                       low = "red",high = "blue") +
#   geom_polygon(data = states, mapping = aes(long, lat, group = group),
#                fill = NA, color = "#ffffff") +
#   # scale_fill_manual(values = c("lightblue","red")) +
#   labs(fill = "Democratic vote share") +
#   theme_bw() +
#   theme(legend.title = element_text(),
#         legend.key.width = unit(.5, "in"),
#         axis.text  = element_blank(),
#         axis.ticks = element_blank(),
#         axis.title = element_blank()) +
#   xlab("") +
#   ylab("")
# dev.off()
# 
# 
# 
# fGraph_protests <- left_join(fData, fCounties, by = "County_state")
# fGraph_protests <- fGraph_protests %>% mutate(Any_protests = factor(Tot_protests>0, levels = c(T,F)))
# 
# 
# pdf("Graphs/Graph_protests_US.pdf")
# ggplot(fGraph_protests, aes(long, lat,group=group, fill=as.integer(Days_of_protests)))+
#   geom_polygon(color=NA) +
#   coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
#   labs(fill = "Days of protests") +
#   scale_fill_continuous(labels=seq(0,14,3),breaks=seq(0,14,3))+
#   # scale_fill_manual(values = c("grey28","grey"))+
#   geom_polygon(data = states, mapping = aes(long, lat, group = group),
#                fill = NA, color = "#ffffff") +
#   # scale_fill_manual(values = c("lightblue","red")) +
#   theme_bw() +
#   theme(legend.title = element_text(),
#         legend.key.width = unit(.5, "in"),
#         axis.text  = element_blank(),
#         axis.ticks = element_blank(),
#         axis.title = element_blank()) +
#   xlab("") +
#   ylab("")
# dev.off()
# 
# 
# 
# fGraph_attendance <- left_join(fData, fCounties, by = "County_state")
# fGraph_attendance <- fGraph_attendance %>% mutate(Any_attendance = factor(Tot_attendance>0, levels = c(T,F)))
# 
# 
# pdf("Graphs/Graph_attendance_US.pdf")
# ggplot(fGraph_attendance, aes(long, lat,group=group, fill=as.integer(Days_of_attendance)))+
#   geom_polygon(color=NA) +
#   coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
#   labs(fill = "Days of attendance") +
#   scale_fill_continuous(labels=seq(0,14,3),breaks=seq(0,14,3))+
#   # scale_fill_manual(values = c("grey28","grey"))+
#   geom_polygon(data = states, mapping = aes(long, lat, group = group),
#                fill = NA, color = "#ffffff") +
#   # scale_fill_manual(values = c("lightblue","red")) +
#   theme_bw() +
#   theme(legend.title = element_text(),
#         legend.key.width = unit(.5, "in"),
#         axis.text  = element_blank(),
#         axis.ticks = element_blank(),
#         axis.title = element_blank()) +
#   xlab("") +
#   ylab("")
# dev.off()
# 
# 
# #re-make rainfall graph. Winsorize at 99%
# fGraph_rainfall <- fGraph_election[,Rainfall_wins := Winsorize(PRCP_total, probs = c(0, 0.99))]
# 
# 
# pdf("Graphs/Graph_rainfall_US.pdf")
# ggplot(fGraph_election, aes(long, lat,group=group, fill=as.numeric(Rainfall_wins)))+
#   geom_polygon(color=NA) +
#   coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
#   labs(fill = "Rainfall") +
#   scale_fill_continuous(labels=seq(0,14,3),breaks=seq(0,14,3))+
#   # scale_fill_manual(values = c("grey28","grey"))+
#   geom_polygon(data = states, mapping = aes(long, lat, group = group),
#                fill = NA, color = "#ffffff") +
#   # scale_fill_manual(values = c("lightblue","red")) +
#   theme_bw() +
#   theme(legend.title = element_text(),
#         legend.key.width = unit(.5, "in"),
#         axis.text  = element_blank(),
#         axis.ticks = element_blank(),
#         axis.title = element_blank()) +
#   xlab("") +
#   ylab("")
# dev.off()
# 
# 
# 
# 




########### Table A3, Panel D: Results for main regression without spatial adjustment ###############
Main_outcome2_main_1 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019, data=fData,x=T)
Main_outcome2_main_2 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac, data=fData,x=T)
Main_outcome2_main_3 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac+Median_income_2019+Unemployment_2019|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac+Median_income_2019+Unemployment_2019, data=fData,x=T)


Reg_main_secondstage <- stargazer(Main_outcome2_main_1,Main_outcome2_main_2,Main_outcome2_main_3,
                                  intercept.bottom = F,
                                  label = "Results_secondstage",
                                  omit.stat = "all",
                                  covariate.labels = c("Attendees/Population","Rain prob.","Population (100,000s)"),
                                  column.labels  = c("Model 1","Model 2","Model 3"),
                                  model.numbers = F,
                                  omit = c("Constant", "White_2019","Black_2019","Asian_2019","Hispanic_2019","Median_age_2019","Median_income_2019","Unemployment_2019", "Less_than_highschool_frac", "Highschool_frac", "Some_college_frac", "Bachelor_frac", "Graduate_frac" ,"Constant"),
                                  title = "The Effect of BLM Protests on the 2020 Presidential Election",
                                  table.placement = "H",
                                  font.size = "scriptsize",
                                  dep.var.labels  ="Change in Democratic vote share",
                                  digits = 3,
                                  dep.var.caption  ="",
                                  add.lines = list(c("Observations",length(Main_outcome2_main_1$fitted.values),length(Main_outcome2_main_2$fitted.values),length(Main_outcome2_main_3$fitted.values)),
                                                   c("First-stage F-stat",round(summary(Main_outcome2_main_1,diagnostics=T)$diagnostics[1,3],2), round(summary(Main_outcome2_main_2,diagnostics=T)$diagnostics[1,3],2), round(summary(Main_outcome2_main_3,diagnostics=T)$diagnostics[1,3],2)),
                                                   c("Demographic controls","No","Yes","Yes"),
                                                   c("Economic controls","No","No","Yes")
                                  )
)

write.table(Reg_main_secondstage,file="Tables/Reg_main_secondstage_new.tex",row.names= FALSE,na="", quote = FALSE, col.names = FALSE)





############# Figure A6: placebo graph ##################
#reduced form period 1
fTrial_exclusion1 <- readxl::read_xlsx("Tables/Results_exclusion_1.xlsx",sheet=1)
fTrial_exclusion2 <- readxl::read_xlsx("Tables/Results_exclusion_2.xlsx",sheet=1)
fTrial_exclusion3 <- readxl::read_xlsx("Tables/Results_exclusion_3.xlsx",sheet=1)
fTrial_exclusion4 <- readxl::read_xlsx("Tables/Results_exclusion_4.xlsx",sheet=1)
fTrial_exclusion5 <- readxl::read_xlsx("Tables/Results_exclusion_5.xlsx",sheet=1)
fTrial_exclusion6 <- readxl::read_xlsx("Tables/Results_exclusion_6.xlsx",sheet=1)
fTrial_exclusion7 <- readxl::read_xlsx("Tables/Results_exclusion_7.xlsx",sheet=1)
fTrial_exclusion8 <- readxl::read_xlsx("Tables/Results_exclusion_8.xlsx",sheet=1)
fTrial_exclusion9 <- readxl::read_xlsx("Tables/Results_exclusion_9.xlsx",sheet=1)

fTrial_exclusion1 <- as.data.frame(fTrial_exclusion1)
fTrial_exclusion2 <- as.data.frame(fTrial_exclusion2)
fTrial_exclusion3 <- as.data.frame(fTrial_exclusion3)
fTrial_exclusion4 <- as.data.frame(fTrial_exclusion4)
fTrial_exclusion5 <- as.data.frame(fTrial_exclusion5)
fTrial_exclusion6 <- as.data.frame(fTrial_exclusion6)
fTrial_exclusion7 <- as.data.frame(fTrial_exclusion7)
fTrial_exclusion8 <- as.data.frame(fTrial_exclusion8)
fTrial_exclusion9 <- as.data.frame(fTrial_exclusion9)


#get coefficients
fTrial_exclusion1_coefs_exclusion1 <- as.numeric(fTrial_exclusion1[2,which(colnames(fTrial_exclusion1)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion1_coefs_exclusion1) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion2_coefs_exclusion2 <- as.numeric(fTrial_exclusion2[2,which(colnames(fTrial_exclusion2)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion2_coefs_exclusion2) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion3_coefs_exclusion3 <- as.numeric(fTrial_exclusion3[2,which(colnames(fTrial_exclusion3)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion3_coefs_exclusion3) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion4_coefs_exclusion4 <- as.numeric(fTrial_exclusion4[2,which(colnames(fTrial_exclusion4)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion4_coefs_exclusion4) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion5_coefs_exclusion5 <- as.numeric(fTrial_exclusion5[2,which(colnames(fTrial_exclusion5)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion5_coefs_exclusion5) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion6_coefs_exclusion6 <- as.numeric(fTrial_exclusion6[2,which(colnames(fTrial_exclusion6)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion6_coefs_exclusion6) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion7_coefs_exclusion7 <- as.numeric(fTrial_exclusion7[2,which(colnames(fTrial_exclusion7)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion7_coefs_exclusion7) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion8_coefs_exclusion8 <- as.numeric(fTrial_exclusion8[2,which(colnames(fTrial_exclusion8)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion8_coefs_exclusion8) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion9_coefs_exclusion9 <- as.numeric(fTrial_exclusion9[2,which(colnames(fTrial_exclusion9)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion9_coefs_exclusion9) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")


#get SEs
fTrial_exclusion1_se_exclusion1 <- as.numeric(fTrial_exclusion1[3,which(colnames(fTrial_exclusion1)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion1_se_exclusion1) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion2_se_exclusion2 <- as.numeric(fTrial_exclusion2[3,which(colnames(fTrial_exclusion2)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion2_se_exclusion2) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion3_se_exclusion3 <- as.numeric(fTrial_exclusion3[3,which(colnames(fTrial_exclusion3)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion3_se_exclusion3) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion4_se_exclusion4 <- as.numeric(fTrial_exclusion4[3,which(colnames(fTrial_exclusion4)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion4_se_exclusion4) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion5_se_exclusion5 <- as.numeric(fTrial_exclusion5[3,which(colnames(fTrial_exclusion5)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion5_se_exclusion5) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion6_se_exclusion6 <- as.numeric(fTrial_exclusion6[3,which(colnames(fTrial_exclusion6)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion6_se_exclusion6) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion7_se_exclusion7 <- as.numeric(fTrial_exclusion7[3,which(colnames(fTrial_exclusion7)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion7_se_exclusion7) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion8_se_exclusion8 <- as.numeric(fTrial_exclusion8[3,which(colnames(fTrial_exclusion8)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion8_se_exclusion8) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")
fTrial_exclusion9_se_exclusion9 <- as.numeric(fTrial_exclusion9[3,which(colnames(fTrial_exclusion9)%in%c("change_frac_dem...2","change_frac_dem...3","change_frac_dem...4","lambda","rho"))])
names(fTrial_exclusion9_se_exclusion9) <- c("PRCP_total","Av_rainprob","TotalPop2019_100000","Longitude","Latitude")




#make graph
fExclusion_main_coefs_all <- data.table(Period = "Main",
                                        PRCP_total_coef = fTrial_exclusion5_coefs_exclusion5["PRCP_total"],
                                        PRCP_total_se = fTrial_exclusion5_se_exclusion5["PRCP_total"])



fExclusion_coefs_all <- rbind(fTrial_exclusion1_coefs_exclusion1, fTrial_exclusion2_coefs_exclusion2, fTrial_exclusion3_coefs_exclusion3, fTrial_exclusion4_coefs_exclusion4, fTrial_exclusion5_coefs_exclusion5, fTrial_exclusion6_coefs_exclusion6,
                              fTrial_exclusion7_coefs_exclusion7,fTrial_exclusion8_coefs_exclusion8,fTrial_exclusion9_coefs_exclusion9)
fExclusion_coefs_all <- as.data.table(fExclusion_coefs_all)[,Period := 1:9]
fExclusion_coefs_all <- fExclusion_coefs_all[,c("PRCP_total","Period")]


fExclusion_se_all <- rbind(fTrial_exclusion1_se_exclusion1, fTrial_exclusion2_se_exclusion2, fTrial_exclusion3_se_exclusion3, fTrial_exclusion4_se_exclusion4, fTrial_exclusion5_se_exclusion5, fTrial_exclusion6_se_exclusion6,
                           fTrial_exclusion7_se_exclusion7,fTrial_exclusion8_se_exclusion8,fTrial_exclusion9_se_exclusion9)
fExclusion_se_all <- as.data.table(fExclusion_se_all)[,Period := 1:9]
fExclusion_se_all <- fExclusion_se_all[,c("PRCP_total","Period")]




fExclusion_both_all <- merge(fExclusion_coefs_all, fExclusion_se_all, by="Period")
colnames(fExclusion_both_all) <- gsub(".x","_coef", colnames(fExclusion_both_all))
colnames(fExclusion_both_all) <- gsub(".y","_se", colnames(fExclusion_both_all))
fExclusion_both_all <- rbind(fExclusion_both_all,fExclusion_main_coefs_all)

fExclusion_both_all <- fExclusion_both_all[,PRCP_total_ci_lower := PRCP_total_coef -1.96*PRCP_total_se]
fExclusion_both_all <- fExclusion_both_all[,PRCP_total_ci_upper := PRCP_total_coef +1.96*PRCP_total_se]
fExclusion_both_all <- fExclusion_both_all[,Min_date := c(dmy("08-03-2020"),
                                                          dmy("21-03-2020"),
                                                          dmy("03-04-2020"),
                                                          dmy("16-04-2020"),
                                                          dmy("29-04-2020"),
                                                          dmy("12-05-2020"),
                                                          
                                                          dmy("08-06-2020"),dmy("21-06-2020"),dmy("04-07-2020"),
                                                          dmy("25-05-2020"))]

fExclusion_both_all <- fExclusion_both_all[,Max_date := c(dmy("20-03-2020"),
                                                          dmy("02-04-2020"),
                                                          dmy("15-04-2020"),
                                                          dmy("28-04-2020"),
                                                          dmy("11-05-2020"),
                                                          dmy("07-05-2020"),
                                                          dmy("20-06-2020"),dmy("03-07-2020"),dmy("16-07-2020"),dmy("24-05-2020"))]

fExclusion_both_all <- fExclusion_both_all[order(Min_date)]
fExclusion_both_all <- fExclusion_both_all[,Period_no := 1:10]
fExclusion_both_all <- fExclusion_both_all[,Min_date_formatted := format(Min_date, "%d-%b")]


#same graph, but with points and error bars
pdf("Graphs/Graph_exclusion_all_points.pdf")
ggplot(fExclusion_both_all, aes(x=Period_no, y=PRCP_total_coef )) +
  geom_point() +
  geom_line()+
  geom_hline(yintercept = 0)+
  geom_errorbar(aes(ymin=PRCP_total_ci_lower,ymax=PRCP_total_ci_upper), position=position_dodge(width=0.9), width=0.5) +
  theme_bw() +
  theme(text=element_text(size=15), axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  ylab("Estimate") +
  scale_x_continuous(name="Start of sample",labels=fExclusion_both_all$Min_date_formatted, breaks=1:10)
dev.off()








############### Figure A5: graph expanding window (with spatial autocorrelation) ###############
fTrial_expanding_attendees <- readxl::read_xlsx("Tables/Results_expanding_wind.xlsx")
colnames(fTrial_expanding_attendees) <- c("Var","Coefficient","CI_lower","CI_upper")
fTrial_expanding_attendees <- as.data.table(fTrial_expanding_attendees)
fTrial_expanding_attendees <- fTrial_expanding_attendees[,Window_length := 4:13]




pdf("Graphs/Expanding_window_spatial.pdf")
ggplot(fTrial_expanding_attendees[Window_length>=6], aes(x=Window_length,y=Coefficient)) +
  geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin=CI_lower, ymax=CI_upper), alpha=0.1) +
  theme_bw() +
  # scale_fill_manual(values=c("blue","yellow"))+
  # scale_color_manual(values=c("blue","yellow"))+
  # labs(fill="Outcome variable",color="Outcome variable")+
  geom_hline(yintercept = 0)+
  scale_x_continuous(name = "End of sample period",minor_breaks = seq(5,13,1),  breaks = seq(5,13,1), labels = c(paste0(30:31," May"),paste0(1:7, " June"))) +
  scale_y_continuous(name = "Estimated coefficient") +
  theme(text=element_text(size=17), axis.text.x = element_text(angle=270),legend.position = "bottom",legend.title = element_blank())
dev.off()









########## Footnote 9: F-stat without spatial autocorrelation ###########

library(OneSampleMR)

test <- ivreg::ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019, data=fData,x=T)

test2 <- felm(Change_frac_dem~Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019|
                0|(Estimate_low_tot_pop_100 ~ PRCP_total), data=fData)

first_stage <- felm(Estimate_low_tot_pop_100~PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019,data=fData)
first_stage_rest <- felm(Estimate_low_tot_pop_100~Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019,data=fData)


SSE_full = sum(first_stage$residuals^2)
SSE_rest = sum(first_stage_rest$residuals^2)


#((SSE1-SSE2)/(df_r - df_f))/(SSE2/(df_f))
((SSE_rest-SSE_full)/(first_stage_rest$df-first_stage$df)/(SSE_full/(first_stage$df)))



############### Table A5: weigh counties by population size, but no spatial adjustment ##############
Main_outcome2_weighted_1 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019, data=fData,x=T, weights = TotalPop2019_100000)
Main_outcome2_weighted_2 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac, data=fData,x=T, weights = TotalPop2019_100000)
Main_outcome2_weighted_3 <- ivreg(Change_frac_dem~Estimate_low_tot_pop_100+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac+Median_income_2019+Unemployment_2019|PRCP_total+Av_rainprob+TotalPop2019_100000+White_2019+Black_2019+Hispanic_2019+Asian_2019+Median_age_2019+Less_than_highschool_frac+Highschool_frac+Some_college_frac+Bachelor_frac+Graduate_frac+Median_income_2019+Unemployment_2019, data=fData,x=T, weights = TotalPop2019_100000)


Reg_weighted_secondstage <- stargazer(Main_outcome2_weighted_1,Main_outcome2_weighted_2,Main_outcome2_weighted_3,
                                      intercept.bottom = F,
                                      label = "Results_secondstage_weighted",
                                      omit.stat = "all",
                                      covariate.labels = c("Attendees/Population","Rain prob.","Population (100,000s)"),
                                      column.labels  = c("Model 1","Model 2","Model 3"),
                                      model.numbers = F,
                                      omit = c("Constant", "White_2019","Black_2019","Asian_2019","Hispanic_2019","Median_age_2019","Median_income_2019","Unemployment_2019", "Less_than_highschool_frac", "Highschool_frac", "Some_college_frac", "Bachelor_frac", "Graduate_frac" ,"Constant"),
                                      title = "The Effect of BLM Protests on the 2020 Presidential Election, weighted, no spatial adjustment",
                                      table.placement = "H",
                                      font.size = "scriptsize",
                                      dep.var.labels  ="Change in Democratic vote share",
                                      digits = 3,
                                      dep.var.caption  ="",
                                      add.lines = list(c("Observations",length(Main_outcome2_weighted_1$fitted.values),length(Main_outcome2_weighted_2$fitted.values),length(Main_outcome2_weighted_3$fitted.values)),
                                                       c("First-stage F-stat",round(summary(Main_outcome2_weighted_1,diagnostics=T)$diagnostics[1,3],2), round(summary(Main_outcome2_weighted_2,diagnostics=T)$diagnostics[1,3],2), round(summary(Main_outcome2_weighted_3,diagnostics=T)$diagnostics[1,3],2)),
                                                       c("Demographic controls","No","Yes","Yes"),
                                                       c("Economic controls","No","No","Yes")
                                      )
)


